2  Extract Rawdata from TCGA and GTEX

2.1 Library Package

library(TransProR)
Registered S3 methods overwritten by 'ggalt':
  method                  from   
  grid.draw.absoluteGrob  ggplot2
  grobHeight.absoluteGrob ggplot2
  grobWidth.absoluteGrob  ggplot2
  grobX.absoluteGrob      ggplot2
  grobY.absoluteGrob      ggplot2
            _______ _____            _   _  _____ _____  _____   ____  _____  
           |__   __|  __ \     /\   | \ | |/ ____|  __ \|  __ \ / __ \|  __ \ 
              | |  | |__) |   /  \  |  \| | (___ | |__) | |__) | |  | | |__) |
              | |  |  _  /   / /\ \ | . ` |\___ \|  ___/|  _  /| |  | |  _  / 
              | |  | | \ \  / ____ \| |\  |____) | |    | | \ \| |__| | | \ \ 
              |_|  |_|  \_\/_/    \_\_| \_|_____/|_|    |_|  \_\\____/|_|  \_\
                                                                   
                                                                   
                                                            
                                                            
         _    _ _______       _____ _____ _______     _________     _______ _____ _____ 
    /\  | |  | |__   __|     / ____/ ____/ ____\ \   / /  __ \ \   / / ____/ ____/ ____|
   /  \ | |  | |  | |       | (___| (___| (___  \ \_/ /| |  | \ \_/ / (___| (___| (___  
  / /\ \| |  | |  | |        \___ \\___ \\___ \  \   / | |  | |\   / \___ \\___ \\___ \ 
 / ____ \ |__| |  | |        ____) |___) |___) |  | |  | |__| | | |  ____) |___) |___) |
/_/    \_\____/   |_|       |_____/_____/_____/   |_|  |_____/  |_| |_____/_____/_____/ 
                                                                                        
                                                                                        
print(paste("TransProR version:", packageVersion("TransProR")))
[1] "TransProR version: 0.0.1"
library(grid)

2.2 Seek Gtex Organ

SeekGtexOrgan <- seek_gtex_organ(path = "../test_TransProR/download_data/GTEX_phenotype")
print(SeekGtexOrgan)

 <not provided>  Adipose Tissue   Adrenal Gland         Bladder           Blood 
              5             621             161              13             595 
   Blood Vessel     Bone Marrow           Brain          Breast    Cervix Uteri 
            753             102            1426             221              11 
          Colon       Esophagus  Fallopian Tube           Heart          Kidney 
            384             805               7             493              38 
          Liver            Lung          Muscle           Nerve           Ovary 
            141             381             478             335             112 
       Pancreas       Pituitary        Prostate  Salivary Gland            Skin 
            203             126             122              71             977 
Small Intestine          Spleen         Stomach          Testis         Thyroid 
            106             121             209             208             366 
         Uterus          Vagina 
             93              99 

2.3 Get Tcga Exp

2.3.1 SKCM-Skin-TCGA

SKCM_exp <- get_tcga_exp(
  counts_file_path = "../test_TransProR/download_data/TCGA-SKCM.htseq_counts.tsv", 
  gene_probes_file_path = "../test_TransProR/download_data/TCGA_gencode.v22.annotation.gene.probeMap",
  phenotype_file_path = "../test_TransProR/download_data/TCGA-skcm.GDC_phenotype.tsv", 
  output_file_path = '../test_TransProR/generated_data1/SKCM_Skin_TCGA_exp.rds'
)
Number of 'Primary Tumor' or 'Metastatic' samples:  474 
Number of 'normal' samples: 3 
head(SKCM_exp[["tumor_tcga_data"]])[1:5, 1:5]
         TCGA-D9-A4Z2-01A TCGA-ER-A2NH-06A TCGA-BF-A5EO-01A TCGA-D9-A6EA-06A
TSPAN6          12.107871        10.504819        10.055282        10.642052
TNMD             0.000000         1.584963         0.000000         1.584963
DPM1            10.992938        10.378295         8.945444        11.236014
SCYL3           10.126704         9.121534         7.930737         9.562242
C1orf112         9.861087         8.629357         7.918863         9.221587
         TCGA-D9-A4Z3-01A
TSPAN6          10.516685
TNMD             0.000000
DPM1             9.981567
SCYL3            8.754888
C1orf112         8.854868
head(SKCM_exp[["normal_tcga_data"]], n = 10) # Because there is only one column.
         TCGA-GN-A4U8-11A
TSPAN6           8.344296
TNMD             1.000000
DPM1            11.171177
SCYL3           10.159871
C1orf112         9.250298
FGR             11.024447
CFH             11.387479
FUCA2           11.595724
GCLC            10.848623
NFYA            11.457381

2.3.2 BRCA-Breast-TCGA

BRCA_exp <- get_tcga_exp(
  counts_file_path = "../test_TransProR/download_data/TCGA-BRCA.htseq_counts.tsv", 
  gene_probes_file_path = "../test_TransProR/download_data/TCGA_gencode.v22.annotation.gene.probeMap",
  phenotype_file_path = "../test_TransProR/download_data/TCGA-BRCA.GDC_phenotype.tsv", 
  output_file_path = '../test_TransProR/generated_data1/BRCA_Breast_TCGA_exp.rds'
)
Number of 'Primary Tumor' or 'Metastatic' samples:  1121 
Number of 'normal' samples: 162 
head(BRCA_exp[["tumor_tcga_data"]])[1:5, 1:5]
         TCGA-A2-A0CY-01A TCGA-B6-A40B-01A TCGA-AO-A0J8-01A TCGA-A8-A08J-01A
TSPAN6           11.62890        11.924813        11.921469        11.209453
TNMD              0.00000         5.392317         5.087463         3.000000
DPM1             10.29921        11.054604        11.068106        12.284246
SCYL3            10.13443        11.045077        12.086800        11.473706
C1orf112          8.80090         9.413628        10.357552         8.988685
         TCGA-E2-A14N-01A
TSPAN6          11.400346
TNMD             3.584963
DPM1            11.253257
SCYL3            9.873444
C1orf112        10.255029
head(BRCA_exp[["normal_tcga_data"]])[1:5, 1:5]
         TCGA-BH-A1F0-11B TCGA-BH-A0BZ-11A TCGA-AC-A2FM-11B TCGA-BH-A0HA-11A
TSPAN6          12.782998        12.245256        11.983706        12.296916
TNMD             9.766529        12.024101         8.693487         7.882643
DPM1            11.373409        10.696098        10.894818        10.788718
SCYL3           10.772315        10.560333         9.539159        11.275543
C1orf112         8.700440         8.299208         7.721099         8.724514
         TCGA-BH-A1FU-11A
TSPAN6          11.288289
TNMD             6.942515
DPM1            10.023754
SCYL3           10.309476
C1orf112         8.290019

2.3.3 LGG-Brain-TCGA

LGG_exp <- get_tcga_exp(
  counts_file_path = "../test_TransProR/download_data/TCGA-LGG.htseq_counts.tsv", 
  gene_probes_file_path = "../test_TransProR/download_data/TCGA_gencode.v22.annotation.gene.probeMap",
  phenotype_file_path = "../test_TransProR/download_data/TCGA-LGG.GDC_phenotype.tsv", 
  output_file_path = '../test_TransProR/generated_data1/LGG_Brain_TCGA_exp.rds'
)
Number of 'Primary Tumor' or 'Metastatic' samples:  516 
Number of 'normal' samples: 2 
head(LGG_exp[["tumor_tcga_data"]])[1:5, 1:5]
         TCGA-VM-A8C8-01A TCGA-VV-A829-01A TCGA-DH-5141-01A TCGA-RY-A840-01A
TSPAN6          10.797662        11.439311        11.856815        11.157978
TNMD             0.000000         1.584963         4.247928         1.000000
DPM1            10.036174        10.108524        10.130571         9.896332
SCYL3            8.810572         9.566054         9.030667         9.147205
C1orf112         7.607330         8.118941         6.845490         7.285402
         TCGA-DB-A64V-01A
TSPAN6          11.401946
TNMD             2.000000
DPM1            10.193525
SCYL3            9.105909
C1orf112         7.584963
LGG_exp[["normal_tcga_data"]]
data frame with 0 columns and 58387 rows

2.3.4 THCA-Thyroid-TCGA

THCA_exp <- get_tcga_exp(
  counts_file_path = "../test_TransProR/download_data/TCGA-THCA.htseq_counts.tsv", 
  gene_probes_file_path = "../test_TransProR/download_data/TCGA_gencode.v22.annotation.gene.probeMap",
  phenotype_file_path = "../test_TransProR/download_data/TCGA-THCA.GDC_phenotype.tsv", 
  output_file_path = '../test_TransProR/generated_data1/THCA_Thyroid_TCGA_exp.rds'
)
Number of 'Primary Tumor' or 'Metastatic' samples:  515 
Number of 'normal' samples: 100 
head(THCA_exp[["tumor_tcga_data"]])[1:5, 1:5]
         TCGA-BJ-A28W-01A TCGA-EM-A1CT-01A TCGA-EL-A4JZ-01A TCGA-EL-A3CT-01A
TSPAN6          10.029287        10.314017        11.273796        12.054943
TNMD             2.000000         0.000000         1.000000         3.000000
DPM1            10.148477         9.485829        10.704768        10.770664
SCYL3            8.820179         9.287712         9.594325         9.092757
C1orf112         6.954196         7.383704         7.820179         7.629357
         TCGA-ET-A2MY-01A
TSPAN6          11.271463
TNMD             0.000000
DPM1            10.754052
SCYL3           10.034799
C1orf112         7.912889
head(THCA_exp[["normal_tcga_data"]])[1:5, 1:5]
         TCGA-BJ-A28W-11A TCGA-EM-A1CT-11A TCGA-EL-A3MX-11A TCGA-DO-A1JZ-11A
TSPAN6           9.946906        12.236612        12.162706        11.880349
TNMD             1.000000         3.000000         3.459432         2.807355
DPM1             9.923327        11.420487        11.103943        10.531381
SCYL3            8.005625        10.169925        10.740624         9.812177
C1orf112         5.554589         7.768184         8.022368         6.977280
         TCGA-EL-A3TA-11A
TSPAN6          12.423116
TNMD             3.169925
DPM1            11.236612
SCYL3           10.586840
C1orf112         7.800900

2.4 Get Gtex Exp

2.4.1 Skin-SKCM-Gtex

Skin_SKCM_gtex <- get_gtex_exp(
  organ_specific = "Skin",
  file_path = "../test_TransProR/download_data/gtex_gene_expected_count", 
  probe_map_path = "../test_TransProR/download_data/gtex_probeMap_gencode.v23.annotation.gene.probemap",
  pheno_path = "../test_TransProR/download_data/GTEX_phenotype", 
  output_path = '../test_TransProR/generated_data1/Skin_SKCM_Gtex.rds'
)
Number of samples for Skin : 977 
head(Skin_SKCM_gtex)[1:5, 1:5]
         GTEX-111CU-1126-SM-5EGIM GTEX-111CU-1926-SM-5GZYZ
TSPAN6                     7.9715                   9.6935
TNMD                       3.3219                   6.4757
DPM1                       8.5078                   9.3619
SCYL3                      8.1741                   9.6454
C1orf112                   6.1930                   8.0506
         GTEX-111FC-0126-SM-5N9DL GTEX-111FC-2526-SM-5GZXU
TSPAN6                     8.0821                   8.8329
TNMD                       6.7944                   7.4838
DPM1                       9.6865                   9.0580
SCYL3                     10.0769                   9.2662
C1orf112                   7.9006                   7.5869
         GTEX-111VG-0008-SM-5Q5BG
TSPAN6                     8.7649
TNMD                       0.0000
DPM1                      10.0126
SCYL3                      7.9960
C1orf112                   8.1122

2.4.2 Brain-LGG-Gtex

Brain_LGG_gtex <- get_gtex_exp(
  organ_specific = "Brain",
  file_path = "../test_TransProR/download_data/gtex_gene_expected_count", 
  probe_map_path = "../test_TransProR/download_data/gtex_probeMap_gencode.v23.annotation.gene.probemap",
  pheno_path = "../test_TransProR/download_data/GTEX_phenotype", 
  output_path = '../test_TransProR/generated_data1/Brain_LGG_Gtex.rds'
)
Number of samples for Brain : 1426 
head(Brain_LGG_gtex)[1:5, 1:5]
         GTEX-1117F-3226-SM-5N9CT GTEX-111FC-3126-SM-5GZZ2
TSPAN6                     9.3354                   7.9189
TNMD                       2.0000                   2.5850
DPM1                       8.6475                   9.0471
SCYL3                      7.5673                   8.7225
C1orf112                   6.8245                   7.3181
         GTEX-111FC-3326-SM-5GZYV GTEX-1128S-2726-SM-5H12C
TSPAN6                     6.9307                   8.0389
TNMD                       0.0000                   2.0000
DPM1                       8.4757                   8.8392
SCYL3                      8.6133                   8.0100
C1orf112                   8.2355                   6.5732
         GTEX-1128S-2826-SM-5N9DI
TSPAN6                     5.8580
TNMD                       0.0000
DPM1                       8.2574
SCYL3                      7.7710
C1orf112                   7.9459

2.4.3 Breast-BRCA-Gtex

Breast_BRCA_gtex <- get_gtex_exp(
  organ_specific = "Breast",
  file_path = "../test_TransProR/download_data/gtex_gene_expected_count", 
  probe_map_path = "../test_TransProR/download_data/gtex_probeMap_gencode.v23.annotation.gene.probemap",
  pheno_path = "../test_TransProR/download_data/GTEX_phenotype", 
  output_path = '../test_TransProR/generated_data1/Breast_BRCA_Gtex.rds'
)
Number of samples for Breast : 221 
head(Breast_BRCA_gtex)[1:5, 1:5]
         GTEX-1117F-2826-SM-5GZXL GTEX-111YS-1926-SM-5GICC
TSPAN6                    11.1799                  11.3761
TNMD                       9.7465                   7.2854
DPM1                       9.3015                   9.7879
SCYL3                      9.0717                  10.1061
C1orf112                   7.8004                   8.3124
         GTEX-117XS-1926-SM-5GICO GTEX-117YX-1426-SM-5H12H
TSPAN6                    11.9826                  10.2842
TNMD                      10.3805                   6.9307
DPM1                       9.9099                   9.4367
SCYL3                      9.4578                   8.9093
C1orf112                   6.9977                   7.7427
         GTEX-1192X-2326-SM-5987X
TSPAN6                    11.0505
TNMD                       8.1849
DPM1                       9.9614
SCYL3                     10.0083
C1orf112                   8.7283

2.4.4 Thyroid-THCA-Gtex

Thyroid_THCA_gtex <- get_gtex_exp(
  organ_specific = "Thyroid",
  file_path = "../test_TransProR/download_data/gtex_gene_expected_count", 
  probe_map_path = "../test_TransProR/download_data/gtex_probeMap_gencode.v23.annotation.gene.probemap",
  pheno_path = "../test_TransProR/download_data/GTEX_phenotype", 
  output_path = '../test_TransProR/generated_data1/Thyroid_THCA_Gtex.rds'
)
Number of samples for Thyroid : 366 
head(Thyroid_THCA_gtex)[1:5, 1:5]
         GTEX-111CU-0226-SM-5GZXC GTEX-111FC-1026-SM-5GZX1
TSPAN6                    10.0553                  10.0701
TNMD                       2.0000                   4.3923
DPM1                      10.0993                   9.5774
SCYL3                      9.6425                   9.5058
C1orf112                   7.4736                   7.6001
         GTEX-111VG-0526-SM-5N9BW GTEX-111YS-0726-SM-5GZY8
TSPAN6                     8.9658                   9.1674
TNMD                       2.0000                   3.0000
DPM1                       9.2668                   9.3859
SCYL3                      8.9591                   9.2350
C1orf112                   7.6817                   7.5346
         GTEX-1122O-0226-SM-5N9DA
TSPAN6                     9.4533
TNMD                       2.8074
DPM1                       9.2668
SCYL3                      9.3993
C1orf112                   7.1872